from transformers import AutoTokenizer, AutoModel
modeid = r'E:\llama\text-generation-webui\models\chatglm2-6b'
tokenizer = AutoTokenizer.from_pretrained(modeid, trust_remote_code=True)
model = AutoModel.from_pretrained(modeid, trust_remote_code=True).half().cuda()
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
print(response)
